My goal is use interactive vizualitations to find patterns on Instagram Posts related to fashion brands.
Data obtained from the project https://arxiv.org/abs/1704.04137. In their data project they obtained 24,752 Instagram posts by 13,350 people on Instagram. The data collection was done over a month period in January, 2015. And in all the posts renowned fashion brand are named in the hashtags.
The data includes:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import altair as alt
from altair import *
from ipywidgets import widgets, interact, interactive, fixed, interact_manual
from IPython.display import display
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
#import re
from string import digits
import spacy
import en_core_web_sm
import nltk
from nltk.corpus import stopwords
print('Loading words, spacy, punktd, stopwords')
nltk.download('words')
nlp = en_core_web_sm.load()
words = set(nltk.corpus.words.words())
nltk.download('stopwords')
nltk.download('punkt')
print('done, now loading text and basic formating of columns name')
# Read dataset and format texts
df = pd.read_csv('fashion data on instagram.csv', index_col=0)
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('-', '_').str.replace('?', '')
df.brandname = df.brandname.str.strip().str.lower().str.replace(' ', '_').str.replace('-', '_').str.replace('?', '')
df.brandcategory = df.brandcategory.str.strip().str.lower().str.replace(' ', '_').str.replace('-', '_').str.replace('?', '')
print(df.columns) #print(df.shape)
print('ready!!')
alt.data_transformers.disable_max_rows()
print('Altair interactive plot')
print('click in the brandcategory to highlight the point in the category')
print('mouse over a data point to see the brand name')
print('scroll over a region to zoom in.')
selector = alt.selection_single(empty='all', fields=['brandcategory'])
colours_obj = alt.Color( 'brandcategory:N')#,scale=alt.Scale(domain=parties,range=party_colours))
colours_condition = alt.condition(selector,
colours_obj,
alt.value("lightgray"))
alt.Chart(df[(df['followers']<=500000) & (df['likes']<=20000)]).mark_point().encode(
alt.X('followers:Q',
scale=alt.Scale(domain=(-5, 500000))),
alt.Y('likes:Q',
scale=alt.Scale(domain=(-5,20000))),
size=alt.Size('comments:Q',
scale=alt.Scale(range=(20,500))),
color=colours_condition,
tooltip=['brandname:N','followers','likes','comments']).interactive().add_selection(selector)